#!/usr/bin/python

import urllib
import urllib2
import re
import json
import sys
import copy
reload(sys)
sys.setdefaultencoding("utf-8")

def grabUrl(url,id):
	values={}
	values['id']=id
	values['start']="0"
	data=urllib.urlencode(values)
	geturl=url + "?"+data
	userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
	headers = {'Upgrade-Insecure-Requests':'1','Host':'www.catalogueoflife.org','Accept-Language':'zh-CN,zh;q=0.8','Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','User-Agent':userAgent,}
	request=urllib2.Request(geturl,headers=headers)
	response=urllib2.urlopen(request)
	return json.loads(response.read())
	
def writeJson(pid,files,status):
	for item in files["items"]:
		#print item["name"]
		if item["rank"]=="":
			item["rank"]="null"
		tempList=[]
		tempList.append(str(pid))
		tempList.append(str(item["id"]))
		tempList.append(str(item["name"]))
		tempList.append(str(item["rank"]))
		tempList.append(str(status))
		list.append(tempList)
		#list.append("%s %s %s %s %s \n"%(pid,item["id"],str(item["name"]),str(item["rank"]),status))

def listCompile(list,list1):
	fp=open("test1.txt","w")
	for i in range(len(list)):
		tmp='\t\t'.join(list[i])+"\n"
		fp.writelines(tmp)
	fp.close()
	if len(list)!=len(list1):
		listShow(list)

def listShow(list):
	list1=copy.deepcopy(list)
	fp=open("test.txt","w")
	for i in range(len(list1)):
		tmp='\t\t'.join(list1[i])+"\n"
		fp.writelines(tmp)
	fp.close()
	
	for i in range(len(list)):
		if list[i][4]=='0':
			tempF=grabUrl(currentUrl,list[i][1])
			writeJson(list[i][1],tempF,0)
			list[i][4]='1'
	listCompile(list,list1)
	
list=[]
list1=[]
currentUrl="http://www.catalogueoflife.org/annual-checklist/2016/browse/tree/fetch/taxa"
files=grabUrl(currentUrl,0)
writeJson(0,files,0)
listCompile(list,list1)
#while (len(list)!=len(list1)):
	#if len(list)>len(list1):
	#	listShow(list)
	#else:
	#	listShow(list1)
	#fp=open("test1.txt","w")
	#fp.writelines(list)
	#fp.close()



		

